library(yaml) 
library(foreign)
library(data.table)
library(tidyverse)
library(readstata13)

### Load paths
CONFIG        <- yaml.load_file("config_global.yaml")
external_path <- CONFIG$source$externals$orig$root
build_path    <- CONFIG$build$prepare_data
lib_path      <- CONFIG$source$lib
helpers_path  <- CONFIG$source$prepare_data$helpers
CONFIG        <- yaml.load_file("config_global.yaml")

### Load function(s)
source(sprintf("%s/library.r",          lib_path))
source(sprintf("%s/load_raw_data.r",    helpers_path))
source(sprintf("%s/map.r",              helpers_path))
source(sprintf("%s/country_cleaning.r", helpers_path))

main <- function() {
  
  # There are two "switzerland" because the second one is for the alternative coding scheme
  # There are two "germany" because the secone one is for East Germany
  # There are two "united_states" because the secone one is for no web
  country_list <- c("australia", "britain", "canada", "denmark", "france", "germany", "germany", "japan",
                    "new_zealand", "norway", "sweden", "switzerland", "switzerland",  "united_states", "united_states", "cses")

  for (c in 1:length(country_list)) {
    country <- country_list[c]
    
    ### Load cumulative data
    if (country %in% c("united_states", "germany", "switzerland", "cses")) db_all <- load_raw_data_all_years(external_path, country, alt = (c == 7)) # germany_alt == 7
    else db_all <- NULL
    
    ### Prepare codebook maps
    maps  <- prepare_maps(country, years = `if`(country == "cses", unique(db_all$year), NULL), alt = (c == 7)) # Germany_alt == 7; https://stackoverflow.com/questions/1335830/why-cant-rs-ifelse-statements-return-vectors
    years <- maps$years
    
    ### Prepare data for each year
    data <- NULL           
    for (i in 1:length(years)){
      
      ## Extract variable names and values for specific year
      yr              <- years[i]
      maps_yr         <- clean_maps_by_year(country, yr, maps)
      varname_yr      <- maps_yr$varname_yr
      parties_yr      <- maps_yr$parties_yr
      thermo_vars_yr  <- maps_yr$thermo_vars_yr 
      thermo_vals_yr  <- maps_yr$thermo_vals_yr 
      thermo_labs_yr  <- maps_yr$thermo_labs_yr
      ideo_vals_yr    <- maps_yr$ideo_vals_yr
      ideo_labs_yr    <- maps_yr$ideo_labs_yr
      
      # Load data by year or subset by year
      db <- load_raw_data_by_year(yr, maps$format, i, external_path, country, db_all) 

      ## Country-specific cleaning steps
      db <- country_cleaning(db, yr, country, varname_yr, parties_yr, thermo_vars_yr, 
                        alt = (c == 13 | c == 15)) # Switzerland (13) and US (15) alternatives
      
      ## Generalized cleaning and formatting
      new_db <- general_cleaning(yr, db, varname_yr, parties_yr, thermo_vars_yr, thermo_labs_yr, thermo_vals_yr, ideo_labs_yr, ideo_vals_yr, 
                            drop_others  = ((!(country %in% c("canada", "united_states", "japan"))) & !(country == "norway" & yr < 1997)))
      ## Bind new rows
      data   <- rbind(data, new_db)  
    }
    
    ### Save
    save_data(data, build_path, country, c)
  }
}

country_cleaning <- function(db, yr, country, varname_yr, parties_yr, thermo_vars_yr, alt = F) {
  call <- sprintf("clean_%s(db, yr, country, varname_yr, parties_yr, thermo_vars_yr, alt = alt)", country)
  if (!(country %in% c('cses', 'germany'))) db <- eval(parse(text = call, country))
  return(db)
}

general_cleaning <- function(yr, db, varname_yr, parties_yr, thermo_vars_yr, thermo_labs_yr, thermo_vals_yr, ideo_labs_yr, ideo_vals_yr, 
                      drop_others = F){
  
  # Extract variables
  party_var    <- varname_yr$party_var
  leaner_var   <- varname_yr$leaner_var
  ideo_var     <- varname_yr$ideo_var
  parties      <- parties_yr[, -c(1:2)] 
  weight_var   <- varname_yr$weight_var
  thermo_vars  <- as.character(thermo_vars_yr[, -c(1:2)])
  thermo_labs  <- thermo_labs_yr[, -c(1:2)]
  thermo_vals  <- as.numeric(thermo_vals_yr[, -c(1:2)])
  ideo_labs    <- ideo_labs_yr[, -c(1:2)]
  ideo_vals    <- as.character(ideo_vals_yr[, -c(1:2)])
  
  if (sum(thermo_labs_yr[, -c(1:2)] == "XXX", na.rm = T) == 11){ # If thermometers are all numeric already (e.g., Norway pre-1997), then just leave as-is.
    thermo_labs<- thermo_vals <- NULL
  }
  
  # Initialize the dataframe and inputs the parties
  new_db           <- data.frame(as.character(db[, party_var]))
  colnames(new_db) <- c("party")
  
  if (leaner_var == "NA" | is.na(leaner_var)) {
    new_db$leaner     <- NA
  } else {
    new_db$leaner     <- as.character(db[, leaner_var])
  }
  
  if (sum(is.na(new_db$leaner)) == nrow(new_db)) {
    new_db$has_leaner <- F
  } else {
    new_db$has_leaner <- T
  }
  
  # Weight variable
  if (weight_var == "NA" | is.na(weight_var)){
    new_db$weight <- 1
  } else {
    new_db$weight <- as.numeric(as.character(db[, weight_var]))
  }

  # Enumerate the parties, which all the following numberings will follow
  # https://stackoverflow.com/questions/17842705/creating-a-named-list-from-two-vectors-names-values
  # Revalue party codings
  new_db$party[!(as.character(new_db$party) %in% parties)] <- NA # Replace other parties with NA
  new_db$party <- revalue(as.character(new_db$party), setNames(c(1:length(parties)), parties)) # Ignore the warning: The following `from` values were not present in `x`: XXX
  
  if (sum(new_db$has_leaner) > 0) {
    # Revalue leaner codings
    new_db$leaner[!(as.character(new_db$leaner) %in% parties)] <- NA # Replace other parties with NA
    new_db$leaner <- revalue(as.character(new_db$leaner), setNames(c(1:length(parties)), parties)) # Ignore the warning: The following `from` values were not present in `x`: XXX
  }

  # Create party-thermometer and party-ideology variables
  for (i in 1:length(parties)){
    # Affect
    if (thermo_vars[i] == "XXX"){
      temp <- NA
    } else {
      temp <- revalue(as.character(db[, thermo_vars[i]]), 
                      setNames(thermo_vals, thermo_labs))
      if (drop_others){
        temp[!(as.character(db[, thermo_vars[i]]) %in% thermo_labs)] <- NA
      }
    }
    new_db[, sprintf("feel_party%s", i)] <- as.numeric(as.character(temp))
  }

  # Recode Ideology
  if (!is.null(ideo_var) && !is.na(ideo_var)){
    new_db$ideology <- revalue(as.character(db[, ideo_var]), setNames(ideo_vals, ideo_labs))
    new_db$ideology <- as.numeric(as.character(new_db$ideology))
  } else {
    new_db$ideology <- NA
  }

  # Drop individuals without a weight or with a zero weight and assign year
  new_db      <- new_db[!is.na(new_db$weight), ]     
  new_db      <- new_db[new_db$weight > 0, ]     
  new_db$year <- yr
  
  return(new_db)
}

save_data <- function(data, build_path, country, c){
  if (c %in% c(7, 13, 15)) { # Save alternative data series
    if (country == "germany") data <- subset(data, year != 1990) # No East Germany respondents gave a party identification
    write.csv(data, sprintf("%s/%s_alt.csv", build_path, country), row.names = F, quote = T)
  } else { # Save main data series
    write.csv(data, sprintf("%s/%s.csv",     build_path, country), row.names = F, quote = T)
  }
}


### Execute
main()
